suppressWarnings(suppressPackageStartupMessages({
library(knitr)
library(dplyr)
library(TwoSampleMR)
library(ggplot2)
library(ggthemes)
library(magrittr)
library(ggrepel)
}))
opts_chunk$set(cache=TRUE, echo=TRUE, message=FALSE, warning=FALSE)
threshold1 <- 0.05 / (300000 * 698)
threshold2 <- 0.05 / (300000)
load("../results/mr_disc_repl.rdata")
load("../../05_cis-trans-networks/data/outcomes.rdata")
ao <- ao %>% filter(access != "developer")
ggplot(mr_disc_repl %>% filter(!is.na(z.repl)), aes(y=z.repl, x=z.disc)) +
geom_point(aes(colour=fdr < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\nFDR < 0.05")
sub <- subset(mr_disc_repl, abs(b.repl) > 1.5 & abs(b.repl) < 20 | (abs(b.disc) > 10 & b.repl < 0) & b.repl > -50 ) %>%
tidyr::separate(outcome.disc, sep=" \\|", into=c("label", "other"))
ggplot(mr_disc_repl %>% filter(!is.na(z.repl), b.repl > -50), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=fdr < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
geom_text_repel(data=sub, aes(label=label)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\nFDR < 0.05")
summary(lm(b.repl ~ b.disc, mr_disc_repl))
##
## Call:
## lm(formula = b.repl ~ b.disc, data = mr_disc_repl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3400 -0.1407 -0.0143 0.1115 4.5271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002049 0.022043 0.093 0.926
## b.disc 0.162850 0.001456 111.882 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4619 on 438 degrees of freedom
## Multiple R-squared: 0.9662, Adjusted R-squared: 0.9661
## F-statistic: 1.252e+04 on 1 and 438 DF, p-value: < 2.2e-16
summary(lm(b.repl ~ b.disc, mr_disc_repl %>% filter(b.repl > -50)))
##
## Call:
## lm(formula = b.repl ~ b.disc, data = mr_disc_repl %>% filter(b.repl >
## -50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0564 -0.0644 -0.0101 0.0433 5.0623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.007226 0.019227 0.376 0.7072
## b.disc 0.022295 0.011990 1.859 0.0636 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4028 on 437 degrees of freedom
## Multiple R-squared: 0.00785, Adjusted R-squared: 0.00558
## F-statistic: 3.458 on 1 and 437 DF, p-value: 0.06363
Are there more small p-values than expected?
binom.test(x=sum(mr_disc_repl$pval.repl < 0.05, na.rm=T), n=nrow(mr_disc_repl), p=0.05)
##
## Exact binomial test
##
## data: sum(mr_disc_repl$pval.repl < 0.05, na.rm = T) and nrow(mr_disc_repl)
## number of successes = 71, number of trials = 440, p-value <
## 2.2e-16
## alternative hypothesis: true probability of success is not equal to 0.05
## 95 percent confidence interval:
## 0.1282295 0.1991290
## sample estimates:
## probability of success
## 0.1613636
What about direction of effect?
binom.test(x=sum(sign(mr_disc_repl$b.disc) == sign(mr_disc_repl$b.repl)), n=nrow(mr_disc_repl), p=0.5)
##
## Exact binomial test
##
## data: sum(sign(mr_disc_repl$b.disc) == sign(mr_disc_repl$b.repl)) and nrow(mr_disc_repl)
## number of successes = 234, number of trials = 440, p-value = 0.198
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
## 0.4839776 0.5792282
## sample estimates:
## probability of success
## 0.5318182
What about direction of effect for just significant assocs?
subset(mr_disc_repl, pval.repl < 0.05) %$% binom.test(x=sum(sign(b.disc) == sign(b.repl)), n=length(pval.repl), p=0.5)
##
## Exact binomial test
##
## data: sum(sign(b.disc) == sign(b.repl)) and length(pval.repl)
## number of successes = 43, number of trials = 71, p-value = 0.09592
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
## 0.4825140 0.7196524
## sample estimates:
## probability of success
## 0.6056338
Is small p-values in repl related to trait type
inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %$% summary(lm(z.repl ~ subcategory))
##
## Call:
## lm(formula = z.repl ~ subcategory)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4025 -0.8777 -0.0020 0.8897 13.9125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02409 0.17531 0.137 0.89078
## subcategoryAutoimmune / inflammatory 0.10602 0.22659 0.468 0.64010
## subcategoryBone -0.34401 1.32355 -0.260 0.79505
## subcategoryCardiovascular -0.60537 1.08540 -0.558 0.57732
## subcategoryDiabetes -1.18981 1.08540 -1.096 0.27362
## subcategoryEducation 0.04921 0.77744 0.063 0.94956
## subcategoryGlycemic -0.53017 0.94406 -0.562 0.57469
## subcategoryHaemotological -0.25413 0.29368 -0.865 0.38735
## subcategoryHemodynamic -0.36184 1.32355 -0.273 0.78469
## subcategoryKidney 0.50155 1.32355 0.379 0.70492
## subcategoryLipid -0.01673 0.33284 -0.050 0.95993
## subcategoryMetal -0.09363 0.94406 -0.099 0.92104
## subcategoryPsychiatric / neurological -1.41153 0.44118 -3.199 0.00148
## subcategoryReproductive aging -0.19332 0.67897 -0.285 0.77599
##
## (Intercept)
## subcategoryAutoimmune / inflammatory
## subcategoryBone
## subcategoryCardiovascular
## subcategoryDiabetes
## subcategoryEducation
## subcategoryGlycemic
## subcategoryHaemotological
## subcategoryHemodynamic
## subcategoryKidney
## subcategoryLipid
## subcategoryMetal
## subcategoryPsychiatric / neurological **
## subcategoryReproductive aging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.855 on 425 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0345, Adjusted R-squared: 0.004968
## F-statistic: 1.168 on 13 and 425 DF, p-value: 0.3003
inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %>% mutate(sig=pval.repl < 0.05) %$% summary(glm(sig ~ subcategory))
##
## Call:
## glm(formula = sig ~ subcategory)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.50000 -0.17262 -0.14970 -0.09302 0.90698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.17857 0.03483 5.127 4.48e-07
## subcategoryAutoimmune / inflammatory -0.02887 0.04502 -0.641 0.5217
## subcategoryBone -0.17857 0.26296 -0.679 0.4974
## subcategoryCardiovascular 0.15476 0.21564 0.718 0.4733
## subcategoryDiabetes -0.17857 0.21564 -0.828 0.4081
## subcategoryEducation -0.01190 0.15446 -0.077 0.9386
## subcategoryGlycemic -0.17857 0.18756 -0.952 0.3416
## subcategoryHaemotological -0.01728 0.05835 -0.296 0.7672
## subcategoryHemodynamic -0.17857 0.26296 -0.679 0.4974
## subcategoryKidney -0.17857 0.26296 -0.679 0.4974
## subcategoryLipid -0.08555 0.06613 -1.294 0.1965
## subcategoryMetal 0.32143 0.18756 1.714 0.0873
## subcategoryPsychiatric / neurological 0.15476 0.08765 1.766 0.0782
## subcategoryReproductive aging -0.05357 0.13489 -0.397 0.6915
##
## (Intercept) ***
## subcategoryAutoimmune / inflammatory
## subcategoryBone
## subcategoryCardiovascular
## subcategoryDiabetes
## subcategoryEducation
## subcategoryGlycemic
## subcategoryHaemotological
## subcategoryHemodynamic
## subcategoryKidney
## subcategoryLipid
## subcategoryMetal .
## subcategoryPsychiatric / neurological .
## subcategoryReproductive aging
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1358652)
##
## Null deviance: 59.517 on 438 degrees of freedom
## Residual deviance: 57.743 on 425 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 385.32
##
## Number of Fisher Scoring iterations: 2
inner_join(mr_disc_repl, subset(ao, select=c(id, subcategory)), by=c("id.outcome"="id")) %>% mutate(sig=pval.repl < 0.05, blood = subcategory %in% c("Haemotological", "Metal", "Protein")) %$% summary(glm(sig ~ blood))
##
## Call:
## glm(formula = sig ~ blood)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.1818 -0.1582 -0.1582 -0.1582 0.8418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.15818 0.01910 8.28 1.51e-15 ***
## bloodTRUE 0.02364 0.04927 0.48 0.632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.136123)
##
## Null deviance: 59.517 on 438 degrees of freedom
## Residual deviance: 59.486 on 437 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 374.37
##
## Number of Fisher Scoring iterations: 2
Which associations appear to be working?
mr_disc_repl$fdr <- p.adjust(mr_disc_repl$pval.repl, "fdr")
mr_disc_repl$concordant <- sign(mr_disc_repl$b.repl) == sign(mr_disc_repl$b.disc)
subset(mr_disc_repl, fdr < 0.05 & concordant)
## id.exposure.disc id.outcome outcome.disc
## 28 3PlZT3 89 Height || id:89
## 142 efgDzZ 30 Crohn's disease || id:30
## 144 egeEio 288 Systemic lupus erythematosus || id:288
## 170 gagCNW 288 Systemic lupus erythematosus || id:288
## 171 gdPW6i 89 Height || id:89
## 172 Gi2MaJ 31 Inflammatory bowel disease || id:31
## 174 gi6WQc 1004 Age at menopause || id:1004
## 179 GvMT1T 89 Height || id:89
## 199 InWOdR 89 Height || id:89
## 228 KCSSjH 89 Height || id:89
## 245 L8b1mm 22 Schizophrenia || id:22
## 250 lnuS95 302 Triglycerides || id:302
## 297 nUzS4H 288 Systemic lupus erythematosus || id:288
## 314 oMhO90 89 Height || id:89
## 360 Sj8U1Q 22 Schizophrenia || id:22
## 429 VqZYFG 833 Rheumatoid arthritis || id:833
## 432 vS7Pql 288 Systemic lupus erythematosus || id:288
## 442 wHkrOx 22 Schizophrenia || id:22
## exposure method.disc nsnp.disc b.disc se.disc
## 28 cg09140281 Wald ratio 1 -0.34818193 0.032770064
## 142 cg21815063 Wald ratio 1 1.60299695 0.183137484
## 144 cg21685006 Wald ratio 1 -10.16842077 0.437970971
## 170 cg09198277 Wald ratio 1 3.84448131 0.576116960
## 171 cg16205897 Wald ratio 1 -0.05151995 0.007985593
## 172 cg24173182 Wald ratio 1 0.83077806 0.127757494
## 174 cg26170066 Wald ratio 1 -1.54908135 0.172120150
## 179 cg23891487 Wald ratio 1 0.60939718 0.049859769
## 199 cg14196790 Wald ratio 1 0.09185566 0.010298968
## 228 cg20135002 Wald ratio 1 0.11570999 0.015427999
## 245 cg13127506 Wald ratio 1 -0.32087002 0.049854803
## 250 cg13598010 Wald ratio 1 -0.52820358 0.041549937
## 297 cg21342636 Wald ratio 1 -1.51322718 0.120745331
## 314 cg05389922 Wald ratio 1 0.22654627 0.009665974
## 360 cg02207219 Wald ratio 1 -1.64419351 0.163951659
## 429 cg10636959 Wald ratio 1 1.01457989 0.150001749
## 432 cg19972273 Wald ratio 1 2.93002580 0.359355666
## 442 cg09012858 Wald ratio 1 -0.87055635 0.120905513
## pval.disc id.exposure.repl outcome.repl
## 28 2.280122e-26 3PlZT3 Height || id:89
## 142 2.078095e-18 iIZEzg Crohn's disease || id:30
## 144 3.058642e-119 6JDJQj Systemic lupus erythematosus || id:288
## 170 2.504698e-11 gagCNW Systemic lupus erythematosus || id:288
## 171 1.106660e-10 gdPW6i Height || id:89
## 172 7.885243e-11 Gi2MaJ Inflammatory bowel disease || id:31
## 174 2.257177e-19 78Z5u1 Age at menopause || id:1004
## 179 2.365294e-34 x6P4Zf Height || id:89
## 199 4.708619e-19 InWOdR Height || id:89
## 228 6.381783e-14 KCSSjH Height || id:89
## 245 1.225900e-10 L8b1mm Schizophrenia || id:22
## 250 5.039709e-37 lnuS95 Triglycerides || id:302
## 297 4.964581e-36 nUzS4H Systemic lupus erythematosus || id:288
## 314 1.772963e-121 jGgxyS Height || id:89
## 360 1.142085e-23 Sj8U1Q Schizophrenia || id:22
## 429 1.344306e-11 VqZYFG Rheumatoid arthritis || id:833
## 432 3.533826e-16 vS7Pql Systemic lupus erythematosus || id:288
## 442 6.007882e-13 bdNGRD Schizophrenia || id:22
## method.repl nsnp.repl b.repl se.repl pval.repl z.disc
## 28 Wald ratio 1 -0.18638449 0.048459967 1.199864e-04 -10.625000
## 142 Wald ratio 1 0.51884832 0.148829372 4.899424e-04 8.752970
## 144 Wald ratio 1 -0.78729726 0.206858802 1.412526e-04 -23.217111
## 170 Wald ratio 1 5.15523076 0.367112890 8.547957e-45 6.673092
## 171 Wald ratio 1 -0.06202405 0.008090093 1.765237e-14 -6.451613
## 172 Wald ratio 1 0.57397314 0.156662360 2.485406e-04 6.502774
## 174 Wald ratio 1 -0.49206625 0.164022084 2.699796e-03 -9.000000
## 179 Wald ratio 1 0.08820080 0.027210884 1.189528e-03 12.222222
## 199 Wald ratio 1 0.03668253 0.008803807 3.090859e-05 8.918919
## 228 Wald ratio 1 0.04266806 0.008031635 1.081314e-07 7.500000
## 245 Wald ratio 1 -0.33816315 0.079766439 2.241016e-05 -6.436090
## 250 Wald ratio 1 -0.18336038 0.043657234 2.669150e-05 -12.712500
## 297 Wald ratio 1 -1.89876058 0.284147449 2.352091e-11 -12.532387
## 314 Wald ratio 1 0.08613678 0.019073144 6.298030e-06 23.437500
## 360 Wald ratio 1 -0.93899660 0.114075988 1.851529e-16 -10.028526
## 429 Wald ratio 1 0.79337482 0.135795364 5.144501e-09 6.763787
## 432 Wald ratio 1 4.05716406 0.731101941 2.866809e-08 8.153554
## 442 Wald ratio 1 -0.42167233 0.044308118 1.785615e-21 -7.200303
## z.repl code z.disc2 z.repl2 fdr concordant
## 28 -3.846154 89 cg09140281 10.625000 3.846154 2.772318e-03 TRUE
## 142 3.486196 30 cg21815063 8.752970 3.486196 9.776578e-03 TRUE
## 144 -3.805965 288 cg21685006 23.217111 3.805965 3.100495e-03 TRUE
## 170 14.042631 288 cg09198277 6.673092 14.042631 3.752553e-42 TRUE
## 171 -7.666667 89 cg16205897 6.451613 7.666667 1.549878e-12 TRUE
## 172 3.663759 31 cg24173182 6.502774 3.663759 5.195682e-03 TRUE
## 174 -3.000000 1004 cg26170066 9.000000 3.000000 3.950702e-02 TRUE
## 179 3.241379 89 cg23891487 12.222222 3.241379 2.008472e-02 TRUE
## 199 4.166667 89 cg14196790 8.918919 4.166667 8.480545e-04 TRUE
## 228 5.312500 89 cg20135002 7.500000 5.312500 4.315427e-06 TRUE
## 245 -4.239416 22 cg13127506 6.436090 4.239416 7.027187e-04 TRUE
## 250 -4.200000 302 cg13598010 12.712500 4.200000 7.811712e-04 TRUE
## 297 -6.682307 288 cg21342636 12.532387 6.682307 1.720947e-09 TRUE
## 314 4.516129 89 cg05389922 23.437500 4.516129 2.126796e-04 TRUE
## 360 -8.231326 22 cg02207219 10.028526 8.231326 2.032054e-14 TRUE
## 429 5.842429 833 cg10636959 6.763787 5.842429 3.226337e-07 TRUE
## 432 5.549382 288 cg19972273 8.153554 5.549382 1.398366e-06 TRUE
## 442 -9.516819 22 cg09012858 7.200303 9.516819 3.919425e-19 TRUE
subset(mr_disc_repl, fdr < 0.05 & !concordant)
## id.exposure.disc id.outcome outcome.disc
## 11 1GUauY 273 Mean cell volume || id:273
## 16 1zjnhP 302 Triglycerides || id:302
## 17 2CGr2i 833 Rheumatoid arthritis || id:833
## 136 dRg6Vv 22 Schizophrenia || id:22
## 164 fhqQsd 89 Height || id:89
## 167 Fv8Jey 1054 Gout || id:1054
## 192 I8U5cS 30 Crohn's disease || id:30
## 193 I8U5cS 31 Inflammatory bowel disease || id:31
## 277 N4u6Ql 89 Height || id:89
## 351 Rlql9S 288 Systemic lupus erythematosus || id:288
## 375 tnuBQA 89 Height || id:89
## 498 yYf8g3 89 Height || id:89
## exposure method.disc nsnp.disc b.disc se.disc pval.disc
## 11 cg14026106 Wald ratio 1 -0.9837641 0.15379763 1.589951e-10
## 16 cg12966376 Wald ratio 1 -0.1995959 0.02724859 2.388987e-13
## 17 cg07790778 Wald ratio 1 1.2599408 0.16651908 3.838711e-14
## 136 cg06507285 Wald ratio 1 0.5529986 0.08689836 1.968995e-10
## 164 cg07097417 Wald ratio 1 0.1526297 0.02081314 2.244976e-13
## 167 cg24973591 Wald ratio 1 2.0543448 0.27117352 3.570380e-14
## 192 cg18608055 Wald ratio 1 -1.1049519 0.16432633 1.766422e-11
## 193 cg18608055 Wald ratio 1 -0.8349481 0.12566131 3.043864e-11
## 277 cg15270729 Wald ratio 1 -0.1085069 0.01345485 7.352650e-16
## 351 cg22097941 Wald ratio 1 -2.1154677 0.29173200 4.124620e-13
## 375 cg04606053 Wald ratio 1 0.1793291 0.02758910 8.032001e-11
## 498 cg23760103 Wald ratio 1 0.2046989 0.01998251 1.260473e-24
## id.exposure.repl outcome.repl method.repl
## 11 1GUauY Mean cell volume || id:273 Wald ratio
## 16 1zjnhP Triglycerides || id:302 Wald ratio
## 17 2CGr2i Rheumatoid arthritis || id:833 Wald ratio
## 136 NxskkE Schizophrenia || id:22 Wald ratio
## 164 54AdQv Height || id:89 Wald ratio
## 167 Fv8Jey Gout || id:1054 Wald ratio
## 192 u0Pbre Crohn's disease || id:30 Wald ratio
## 193 u0Pbre Inflammatory bowel disease || id:31 Wald ratio
## 277 Yy7qca Height || id:89 Wald ratio
## 351 Rlql9S Systemic lupus erythematosus || id:288 Wald ratio
## 375 I0Cr3X Height || id:89 Wald ratio
## 498 yYf8g3 Height || id:89 Wald ratio
## nsnp.repl b.repl se.repl pval.repl z.disc z.repl
## 11 1 0.9803579 0.29081896 7.488913e-04 -6.396484 3.371025
## 16 1 0.2598365 0.06348278 4.257850e-05 -7.325000 4.093023
## 17 1 -0.8475167 0.26030234 1.130362e-03 7.566345 -3.255893
## 136 1 -0.2889436 0.09542444 2.461915e-03 6.363740 -3.027984
## 164 1 -0.0896861 0.02790234 1.307695e-03 7.333333 -3.214286
## 167 1 -2.0033987 0.37711034 1.081314e-07 7.575758 -5.312500
## 192 1 0.4968059 0.14941030 8.838321e-04 -6.724132 3.325111
## 193 1 0.4469365 0.10969364 4.613178e-05 -6.644433 4.074407
## 277 1 0.3239691 0.06633653 1.041024e-06 -8.064516 4.883721
## 351 1 0.6821828 0.21344427 1.393133e-03 -7.251408 3.196070
## 375 1 -0.1777637 0.02121696 5.366189e-17 6.500000 -8.378378
## 498 1 -0.2094551 0.03716138 1.736784e-08 10.243902 -5.636364
## code z.disc2 z.repl2 fdr concordant
## 11 273 cg14026106 6.396484 -3.371025 1.429406e-02 FALSE
## 16 302 cg12966376 7.325000 -4.093023 1.099527e-03 FALSE
## 17 833 cg07790778 7.566345 -3.255893 1.984915e-02 FALSE
## 136 22 cg06507285 6.363740 -3.027984 3.726830e-02 FALSE
## 164 89 cg07097417 7.333333 -3.214286 2.126215e-02 FALSE
## 167 1054 cg24973591 7.575758 -5.312500 4.315427e-06 FALSE
## 192 30 cg18608055 6.724132 -3.325111 1.616676e-02 FALSE
## 193 31 cg18608055 6.644433 -4.074407 1.125103e-03 FALSE
## 277 89 cg15270729 8.064516 -4.883721 3.808412e-05 FALSE
## 351 288 cg22097941 7.251408 -3.196070 2.184233e-02 FALSE
## 375 89 cg04606053 6.500000 -8.378378 7.852524e-15 FALSE
## 498 89 cg23760103 10.243902 -5.636364 9.530605e-07 FALSE
Schizophrenia has some consistent results but looking at all of them together is less convincing
ggplot(mr_disc_repl %>% filter(id.outcome == 22), aes(y=z.repl, x=z.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm") +
ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")
LDL cholesterol
ggplot(mr_disc_repl %>% filter(id.outcome == 300), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm")
# ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")
## $x
## [1] "Z-score primary MR"
##
## $y
## [1] "Z-score secondary MR"
##
## $colour
## [1] "Replication\np-value < 0.05"
##
## attr(,"class")
## [1] "labels"
Systemic lupus erythematosus
ggplot(mr_disc_repl %>% filter(id.outcome == 288), aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=pval.repl < 0.05)) +
geom_abline(slope=1, linetype="dotted") +
geom_smooth(method="lm")
# ylim(min(mr_disc_repl$z.disc), max(mr_disc_repl$z.disc)) +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05")
## $x
## [1] "Z-score primary MR"
##
## $y
## [1] "Z-score secondary MR"
##
## $colour
## [1] "Replication\np-value < 0.05"
##
## attr(,"class")
## [1] "labels"
Get the comparisons for all traits for which there is at least one replication with FDR < 0.05
trait_list <- unique(subset(mr_disc_repl, pval.repl < 0.05)$id.outcome)
subset(mr_disc_repl, id.outcome %in% trait_list) %>% group_by(outcome.disc) %>% summarise(n=n()) %>% kable
| outcome.disc | n |
|---|---|
| Rheumatoid arthritis || id:833 | 12 |
| Years of schooling || id:1001 | 6 |
| Schizophrenia || id:22 | 20 |
| Height || id:89 | 61 |
| Crohn’s disease || id:30 | 53 |
| Birth weight || id:1084 | 15 |
| Inflammatory bowel disease || id:31 | 45 |
| Mean platelet volume || id:1006 | 3 |
| Ulcerative colitis || id:32 | 26 |
| Systemic lupus erythematosus || id:288 | 22 |
| Transferrin Saturation || id:1051 | 2 |
| Mean cell haemoglobin || id:271 | 20 |
| Extreme height || id:86 | 8 |
| Mean cell volume || id:273 | 20 |
| LDL cholesterol || id:300 | 7 |
| Myocardial infarction || id:798 | 2 |
| Triglycerides || id:302 | 12 |
| Age at menopause || id:1004 | 5 |
| Red blood cell count || id:275 | 17 |
| Transferrin || id:1052 | 1 |
| Gout || id:1054 | 1 |
Plot them
for(i in 1:length(trait_list))
{
temp <- mr_disc_repl %>% filter(id.outcome == trait_list[i])
temp$hla_disc <- FALSE
temp$hla_repl <- FALSE
for(j in 1:nrow(temp))
{
temp2 <- subset(discovery, id.outcome == trait_list[i] & exposure == temp$exposure[j])
temp3 <- strsplit(temp2$SNP, split=":")[[1]]
temp$hla_disc[j] <- temp3[1] == "chr6" & as.numeric(temp3[2]) > 24570005 & as.numeric(temp3[2]) < 38377657
temp2 <- subset(replication, id.outcome == trait_list[i] & exposure == temp$exposure[j])
temp3 <- strsplit(temp2$SNP, split=":")[[1]]
temp$hla_repl[j] <- temp3[1] == "chr6" & as.numeric(temp3[2]) > 24570005 & as.numeric(temp3[2]) < 38377657
}
p1 <- ggplot(temp, aes(y=b.repl, x=b.disc)) +
geom_point(aes(colour=pval.repl < 0.05, size=hla_disc, shape=hla_repl)) +
geom_abline(slope=1, linetype="dotted") +
geom_errorbar(width=0, color="grey", aes(ymin=b.repl-1.96*se.repl, ymax=b.repl+1.96*se.repl)) +
geom_errorbarh(height=0, color="grey", aes(xmin=b.disc-1.96*se.disc, xmax=b.disc+1.96*se.disc)) +
geom_smooth(method="lm") +
labs(x="Z-score primary MR", y="Z-score secondary MR", colour="Replication\np-value < 0.05", title = unique(subset(mr_disc_repl, id.outcome == trait_list[i])$outcome.disc))
print(p1)
}
Create table
out <- mr_disc_repl %$% data_frame(
cpg = exposure,
outcome = outcome.disc,
beta.primary = b.disc,
se.primary = se.disc,
pval.primary = pval.disc,
beta.secondary = b.repl,
se.secondary = se.repl,
pval.secondary = pval.repl
)
write.csv(out, file="../results/mr_primary_secondary.csv")